home *** CD-ROM | disk | FTP | other *** search
- /* Bob Boonstra
- * Solution strategy
- * Factoring is a field which has been the subject of a
- * great deal of research because of the implications for
- * cryptography, especially techniques that depend on
- * the difficulty of factoring very large numbers.
- * Therefore, it is possible that some of these
- * algorithms could be applied to the challenge.
- *
- * However, in the event that no mathematician
- * specializing in the field chooses to enter the
- * Challenge, this relatively simple solution takes
- * advantage of some of the simplifying conditions in the
- * problem statement:
- * 1) the numbers are relatively small (64 bits, or
- * ~<20 digits)
- * 2) the prime factors are even smaller (32 bits, or
- * ~<10 digits)
- *
- * This solution depends on no precomputed information.
- * It is based on Fermat's algorithm, described in Knuth
- * Vol II, which is especially well suited to the problem
- * because it is most efficient when the two p,
- *
- * Fermat's algorithm requires ~(p-1)sqrt(n) iterations,
- * where n=u*v and u~=p*sqrt(n), v~=sqrt(n)/p.
- * Other algorithms require half as many iterations, but
- * require more calculation per iteration.
- *
- * Fermat's algorithm works as follows:
- * 1) Let n - u*v, u and v odd primes.
- * 2) Set a = (u+v)/2 and b = (u-v)/2.
- * 3) Then n = uv = a**2 - b**2
- * 4) Initialize a = trunc(sqrt(n)), b=0, r=a**2-b**2-n
- * 5) Iterate looking for r==0, with an inner loop that
- * keeps a=(u+v)/2 constant and increases b=(u-v)/2
- * by 1 each iteration until r becomes negative. When
- * this happens, the halfsum a is increased by 1, and
- * the difference loop is repeated.
- *
- * The algorithm in Knuth uses auxiliary variables x,y
- * for efficiency, where:
- * x = 2*a+1 and y = 2*b+1
- * This works fine in most cases, but causes overflow
- * of a longword when x,y are the full 32-bits in size.
- * So we have augmented the algorithm to deal with this
- * case.
- *
- * This solution also uses an efficient integer sqrt
- * algorithm due to Ron Hunsinger, and extends that
- * algorithm to 64 bits.
- */
-
- #pragma options(assign_registers,honor_register)
-
- #define ulong unsigned long
- #define ushort unsigned short
-
- #define kLo16Bits 0xFFFF
- #define kHiBit 0x80000000UL
- #define kLo2Bits 3
- #define kLo1Bit 1
-
- /*
- * Macros RightShift2 and RightShift1 shift a 64-bit value
- * right by 2 and 1 bits, respectively.
- */
- #define RightShift32By2(xL,xH) \
- { \
- xL >>= 2; \
- xL |= (xH & kLo2Bits)<<30; \
- xH >>= 2; \
- }
-
- #define RightShift32By1(xL,xH) \
- { \
- xL >>= 1; \
- xL |= (xH & kLo1Bit)<<31; \
- xH >>= 1; \
- }
-
- /*
- * Macros Add32To64 (Sub32From64) add (subtract) a 32-bit
- * value to (from) a 64-bit value.
- */
- #define Add32To64(rL,rH, a) \
- temp = rL; \
- if ((rL += a) < temp) ++rH;
-
- #define Add2NPlus1To64(lowR,highR,a) \
- Add32To64(lowR,highR,a); \
- Add32To64(lowR,highR,a); \
- Add32To64(lowR,highR,1);
-
- #define Sub32From64(rL,rH, s) \
- temp = rL; \
- if ((rL -= s) > temp) --rH;
-
- #define Sub2NPlus1From64(lowR,highR,s) \
- Sub32From64(lowR,highR,s); \
- Sub32From64(lowR,highR,s); \
- Sub32From64(lowR,highR,1);
-
- /*
- * Macros Add64 (Sub64) add (subtract) two 64-bit values.
- */
- #define Add64(qL,qH, eL,eH) \
- Add32To64(qL,qH,eL); \
- qH += eH;
-
- #define Sub64(qL,qH, eL,eH) \
- Sub32From64(qL,qH, eL); \
- qH -= eH;
-
- /*
- * Macro Square64 multiplies a 32-bit value by itself to
- * produce the square as a 64-bit value. For this solution,
- * we only need to execute this macro expansion once.
- */
- #define Square64(a,rL,rH,temp) \
- { \
- ulong lohi,lolo,hihi; \
- ushort aHi,aLo; \
- \
- aHi = a>>16; \
- aLo = a; \
- \
- rL = (lolo = (ulong)aLo*aLo)&kLo16Bits; \
- lohi = (ulong)aLo*aHi; \
- \
- temp = ((lohi&kLo16Bits)<<1) + (lolo>>16); \
- rL |= temp<<16; \
- \
- temp>>=16; \
- temp += ((hihi = (ulong)aHi*aHi)&kLo16Bits) + \
- (lohi>>(16-1)); \
- rH = temp&kLo16Bits; \
- \
- temp>>=16; \
- temp += hihi>>16; \
- rH |= temp<<16; \
- }
-
- /*
- * Macros LessEqualZero64 and EqualZero64 determine if
- * 64-bit (signed) values are <= 0 or == 0, respectively.
- */
- #define LessEqualZero64(vL,vH) \
- ( (0>(long)vH) || ((0==vH) && (0==vL)) )
-
- #define EqualZero64(vL,vH) \
- ((0==vL) && (0==vH))
-
- /*
- * Macro LessEqual64 determines if one 64-bit quantity is
- * less than or equal to another.
- */
- #define LessEqual64(uL,uH, vL,vH) \
- ( (uH< vH) || ((uH==vH) && (uL<=vL)))
-
- /*
- * Function prototypes.
- */
- ulong sqrt64 (ulong nLo,ulong nHi);
- void Factor64(ulong lowHalf,ulong highHalf,
- ulong *prime1Ptr,ulong *prime2Ptr);
-
- /*
- * The solution ...
- */
- /*********************************************************
- * *
- * Factor64 *
- * *
- *********************************************************/
- void Factor64(lowHalf,highHalf,prime1Ptr,prime2Ptr)
- unsigned long lowHalf,highHalf;
- unsigned long *prime1Ptr,*prime2Ptr;
- {
- register ulong x,y,lowR,highR;
- register ulong temp;
- ulong sqrtN;
-
- /*
- * Fermat's algorithm (Knuth)
- *
- * Assume n=u*v, u<v, n odd, u,v odd
- * Let a=(u+v)/2 b=(u-v)/2 n=a**2-b**2 0<=y<x<=n
- * Search for a,b that satisfy x**2-y**2-n==0
- *
- * NOTE: u,v given as being < 2**32 (fit in one word).
- * Therefore a,b also are < 2**32 (and fit in one word).
- *
- * C1: Set x=2*floor(srt(n))+1,
- * y=1,
- * r=floor(sqrt(n))**2-n
- * x corresponds to 2a+1, y to 2b+1, r to a**2-b**2-n
- *
- * C2: if r<=0 goto C4
- * (algorithm modified to keep r positive)
- * C3: r=r-y, y=y+2,
- * goto C2
- * C4: if (r==0) terminate with n = p*q,
- * p=(x-y)/2, q=(x+y-2)/2
- * C5: r=r+x, x=x+2,
- * goto C3
- *
- * This solution modifies the algorithm to:
- * (1) reorder arithmetic on r to keep it positive
- * (2) extend r to 64 bits when necessary
- * (3) handle the trivial case where one of the primes is 2.
- */
- /*
- * Handle the trivial case with an even prime factor.
- */
- if (0 == (lowHalf&1)) {
- *prime1Ptr = 2;
- *prime2Ptr = (lowHalf>>1) | ((highHalf&kLo1Bit)<<31);
- return;
- }
- /*
- * Compute truncated square root of input 64-bit number.
- */
- sqrtN = temp = sqrt64(lowHalf,highHalf);
- Square64(temp,lowR,highR,y);
- /*
- * Initialize r to s*s - n,
- * but calculate n-s*s to keep r positive, and fix
- * later when it is time to add x to r by calculating
- * r = x - (n-s*s).
- */
- Sub64(lowHalf,highHalf, lowR,highR);
- /*
- * Handle perfect square case.
- */
- if ((0==highHalf) && (0==lowHalf)) {
- *prime1Ptr = *prime2Ptr = sqrtN;
- return;
- }
- y = 1;
- highR = 0;
- /*
- * Separate out the overflow case where x=2a+1 does not
- * fit into a long word.
- */
- if ((temp=sqrtN) >= kHiBit-1) goto doLargeX;
- /*
- * If sqrt(n) < 0x80000000, then 2*sqrt(n)+1 fits in one
- * long word. Also, n-trunc(sqrt(n))**2 < 2*trunc(sqrt(n))
- * also fits in a long word.
- */
- x = 1+2*temp;
- lowR = x - lowHalf;
- x += 2;
- do {
- C2:
- if (lowR<=y) break;
- lowR -= y;
- y += 2;
- } while (true); /* exit when r<=y */
- C4:
- if (y==lowR) {
- *prime1Ptr = (x-y-2)>>1;
- *prime2Ptr = (x+y)>>1;
- return;
- }
- lowR += (x-y);
- y += 2;
- /*
- * Fall through to modified algorithm if x overflows a
- * long word.
- */
- if ((x += 2) < (ulong)0xFFFFFFFF-2) goto C2;
- /*
- * Adjust x and y to guarantee they will not overflow.
- * This requires some extra arithmetic to add 2*a+1 and
- * subtract 2*b+1, but that is preferable to using two
- * longs to represent each of x and y.
- */
- x>>=1;
- y>>=1;
- goto C3L;
-
- doLargeX:
- /*
- * x=2*a+1 no longer fits in 32 bits, so we sacrifice a
- * little loop efficiency and let x=a.
- * Likewise, we let y=b instead of 2*b+1.
- */
- lowR = x = temp;
- Add32To64(lowR,highR,x);
- Sub64(lowR,highR, lowHalf,highHalf);
- ++x;
- do {
- if ( LessEqualZero64(lowR,highR) ) break;
- C3L:
- Sub2NPlus1From64(lowR,highR,y);
- ++y;
- } while (true); /* exit when lowR,highR<=0 */
- C4L:
- if ( EqualZero64(lowR,highR)) {
- *prime1Ptr = x-y;
- *prime2Ptr = x+y;
- return;
- }
- Add2NPlus1To64(lowR,highR,x);
- ++x;
- goto C3L;
- }
-
- /*********************************************************
- * *
- * sqrt64 *
- * *
- *********************************************************/
- /*
- * sqrt_max4pow is the largest power of 4 that can be
- * represented in an unsigned long.
- */
- #define sqrt_max4pow (1UL << 30)
- /*
- * undef sqrt_truncate if rounded sqrts are desired; for
- * the factoring problem we want truncated sqrts.
- */
- #define sqrt_truncate
-
- /*
- * sqrt64 is based on code posted by Ron Hunsinger
- * to comp.sys.mac.programmer. Modified to handle 64-bit
- * values.
- */
- ulong sqrt64 (ulong lowN, ulong highN) {
- /*
- * Compute the integer square root of the integer argument
- * n. Method is to divide n by x computing the quotient x
- * and remainder r. Notice that the divisor x is changing
- * as the quotient x changes.
- *
- * Instead of shifting the dividend/remainder left, we
- * shift the quotient/divisor right. The binary point
- * starts at the extreme left, and shifts two bits at a
- * time to the extreme right.
- *
- * The residue contains n-x**2.
- * Since (x + 1/2)**2 == x**2 + x + 1/4,
- * n - (x + 1/2)**2 == (n - x**2) - (x + 1/4)
- * Thus, we can increase x by 1/2 if we decrease (n-x**2)
- * by (x+1/4)
- */
- register ulong lowResidue,highResidue; /* n - x**2 */
- register ulong lowRoot,highRoot; /* x + 1/4 */
- register ulong half; /* 1/2 */
- ulong highhalf,lowhalf,temp;
-
- lowResidue = lowN;
- if (0 != (highResidue = highN)) {
- /*
- * This code extends the original algorithm from 32 bits to
- * 64 bits. It parallels the 32-bit code; see below for
- * comments.
- */
- highRoot = sqrt_max4pow; lowRoot = 0;
- while (highRoot>highResidue)
- RightShift32By2(lowRoot,highRoot);
- Sub64(lowResidue,highResidue, lowRoot,highRoot);
- /*
- * The binary point for half is now in the high order of
- * two 32-bit words representing the 64-bit value.
- */
- lowhalf = lowRoot; highhalf = highRoot;
- RightShift32By2(lowhalf,highhalf);
- Add64(lowRoot,highRoot, lowhalf,highhalf);
- if (0==highhalf) goto sqrt2;
- half = highhalf<<1;
- do {
- if (LessEqual64(lowRoot,highRoot,lowResidue,
- highResidue)) {
- highResidue -= highRoot;
- highRoot += half;
- }
- if (0 == (half>>=2)) {
- half = sqrt_max4pow<<1;
- goto sqrt2a;
- }
- highRoot -= half;
- highRoot >>= 1;
- } while (true);
- sqrt2:
- /*
- * The binary point for half is now in the lower of the
- * two 32-bit words representing the 64-bit value.
- */
- half = lowhalf<<1;
- do {
- if ((0==highResidue) && (0==highRoot)) goto sqrt3;
- if (LessEqual64(lowRoot,highRoot,lowResidue,
- highResidue)) {
- Sub64(lowResidue,highResidue, lowRoot,highRoot);
- Add32To64(lowRoot,highRoot,half);
- }
- half >>= 2;
- sqrt2a:
- Sub32From64(lowRoot,highRoot,half);
- RightShift32By1(lowRoot,highRoot);
- } while (half);
- } else /* if (0 == highResidue) */ {
- #ifndef sqrt_truncate
- if (lowResidue <= 12)
- return (0x03FFEA94 >> (lowResidue *= 2)) & 3;
- #else
- if (lowResidue <= 15)
- return (0xFFFEAA54 >> (lowResidue *= 2)) & 3;
- #endif
- lowRoot = sqrt_max4pow;
- while (lowRoot>lowResidue) lowRoot>>=2;
-
- /* Decrease (n-x**2) by (0+1/4) */
- lowResidue -= lowRoot;
- /* 1/4, with binary point shifted right 2 */
- half = lowRoot >> 2;
- /* x=1. (lowRoot is now (x=1)+1/4.) */
- lowRoot += half;
- /* 1/2, properly aligned */
- half <<= 1;
-
- /* Normal loop (there is at least one iteration remaining)*/
- do {
- sqrt3:
- if (lowRoot <= lowResidue) {
- /*
- * Whenever we can, decrease (n-x**2) by (x+1/4)
- */
- lowResidue -= lowRoot;
- lowRoot += half;
- }
- /* Shift binary point 2 places right */
- half >>= 2;
- /* x{+1/2}+1/4 - 1/8 == x{+1/2}+1/8 */
- lowRoot -= half;
- /* 2x{+1}+1/4, shifted right 2 places */
- lowRoot >>= 1;
- /* When 1/2 == 0, bin point is at far right */
- } while (half);
- }
- #ifndef sqrt_truncate
- if (lowRoot < lowResidue) ++lowRoot;
- #endif
- /*
- * Return value guaranteed to be correctly rounded
- * (or truncated)
- */
- return lowRoot;
- }
-